home *** CD-ROM | disk | FTP | other *** search
Wrap
# Source Generated with Decompyle++ # File: in.pyc (Python 2.6) import os import os.path as path from sys import stderr import sqlite3 import tabdict import uuid import time import re patt_r = re.compile('c([ea])(\\d):(.*)') patt_p = re.compile('p(-{0,1}\\d)(\\d)') class tabsqlitedb: '''Phrase database for tables''' def __init__(self, name = 'table.db', user_db = None, filename = None): self.parse = tabdict.parse self.deparse = tabdict.deparse self._add_phrase_sqlstr = '' self.old_phrases = [] self.ime_property_cache = { } if filename: self.db = sqlite3.connect(filename) else: try: os.system('cat %s > /dev/null' % name) except: pass self.db = sqlite3.connect(name) try: self.db.execute('PRAGMA page_size = 8192; ') self.db.execute('PRAGMA cache_size = 20000; ') self.db.execute('PRAGMA temp_store = MEMORY; ') self.db.execute('PRAGMA synchronous = OFF; ') except: print 'encountering error when init db' self.db.executescript('CREATE TABLE IF NOT EXISTS main.ime (attr TEXT, val TEXT);') if not self.db.execute('SELECT val FROM main.ime WHERE attr="name";').fetchall(): ime_keys = { 'name': '', 'name.zh_cn': '', 'name.zh_hk': '', 'name.zh_tw': '', 'author': 'somebody', 'uuid': '%s' % uuid.uuid4(), 'serial_number': '%s' % time.strftime('%Y%m%d'), 'icon': 'ibus-table.svg', 'license': 'LGPL', 'languages': '', 'valid_input_chars': 'abcdefghijklmnopqrstuvwxyz', 'max_key_length': '4', 'status_prompt': '', 'def_full_width_punct': 'TRUE', 'def_full_width_letter': 'FALSE', 'user_can_define_phrase': 'FALSE', 'pinyin_mode': 'FALSE', 'dynamic_adjust': 'FALSE', 'auto_commit': 'false', 'description': 'A IME under IBus Table', 'layout': 'us', 'rules': '' } for _name in ime_keys: sqlstr = 'INSERT INTO main.ime (attr,val) VALUES (?,?);' self.db.execute(sqlstr, (_name, ime_keys[_name])) self._mlen = int(self.get_ime_property('max_key_length')) self._is_chinese = self.is_chinese() self._set_add_phrase_sqlstr() self._pt_index = [ 'id', 'mlen', 'clen'] for i in range(self._mlen): self._pt_index.append('m%d' % i) if self._is_chinese: self._pt_index += [ 'category'] self._pt_index += [ 'phrase', 'freq', 'user_freq'] self.user_can_define_phrase = self.get_ime_property('user_can_define_phrase') if self.user_can_define_phrase: if self.user_can_define_phrase.lower() == u'true': self.user_can_define_phrase = True else: self.user_can_define_phrase = False else: print 'Could not find "user_can_define_phrase" entry from database, is it a outdated database?' self.user_can_define_phrase = False self.dynamic_adjust = self.get_ime_property('dynamic_adjust') if self.dynamic_adjust: if self.dynamic_adjust.lower() == u'true': self.dynamic_adjust = True else: self.dynamic_adjust = False else: print 'Could not find "dynamic_adjust" entry from database, is it a outdated database?' self.dynamic_adjust = False self.rules = self.get_rules() self.pkeylens = [] if self.rules: self.pkeylens = self.phrase_keys_len() self._goucima = { } if filename: return None try: self.db.execute('ATTACH DATABASE "%s" AS user_db;' % user_db) except: None if user_db != None else filename print >>stderr, 'The user database was damaged. We will recreate it!' os.rename(user_db, '%s.%d' % (user_db, os.getpid())) self.init_user_db(user_db) self.db.execute('ATTACH DATABASE "%s" AS user_db;' % user_db) self.create_tables('user_db') if self.old_phrases: phrases = filter((lambda x: x[0] > 1), self.old_phrases) phrases = (map,)((lambda x: [ self.parse_phrase_to_tabkeys(x[1])] + list(x[1:])), phrases) map(self.u_add_phrase, phrases) self.db.commit() self.create_indexes('user_db', commit = False) self.generate_userdb_desc() mudb = ':memory:' self.db.execute('ATTACH DATABASE "%s" AS mudb;' % mudb) self.create_tables('mudb') def update_phrase(self, entry, database = 'user_db'): '''update phrase freqs''' _con = [ entry[-1]] + list(entry[1:3 + entry[1]]) + [ entry[-3]] _condition = u''.join(map((lambda x: 'AND m%d = ? ' % x), range(entry[1]))) sqlstr = 'UPDATE %s.phrases SET user_freq = ? WHERE mlen = ? AND clen = ? %s AND phrase = ?;' % (database, _condition) self.db.execute(sqlstr, _con) self.db.commit() def sync_usrdb(self): mudata = self.db.execute('SELECT * FROM mudb.phrases;').fetchall() data_u = filter((lambda x: x[-2] in (1, -3)), mudata) data_a = filter((lambda x: x[-2] == 2), mudata) data_n = filter((lambda x: x[-2] == -2), mudata) data_a = (map,)((lambda x: (u''.join(map(self.deparse, x[3:3 + x[1]])), x[-3], 0, x[-1])), data_a) data_n = (map,)((lambda x: (u''.join(map(self.deparse, x[3:3 + x[1]])), x[-3], -1, x[-1])), data_n) map(self.update_phrase, data_u) map(self.u_add_phrase, data_a) map(self.u_add_phrase, data_n) self.db.commit() def is_chinese(self): _tabsqlitedb__lang = self.get_ime_property('languages') if _tabsqlitedb__lang: _tabsqlitedb__langs = _tabsqlitedb__lang.split(',') for _l in _tabsqlitedb__langs: if _l.lower().find('zh') != -1: return True return False def create_tables(self, database): '''Create tables that contain all phrase''' try: self.db.execute('PRAGMA cache_size = 20000; ') except: pass if database == 'main': sqlstr = 'CREATE TABLE IF NOT EXISTS %s.ikeys (ikey TEXT PRIMARY KEY, id INTEGER);' % database self.db.execute(sqlstr) sqlstr = 'CREATE TABLE IF NOT EXISTS %s.goucima (zi TEXT PRIMARY KEY' % database for i in range(self._mlen): sqlstr += ', g%d INTEGER' % i sqlstr += ');' self.db.execute(sqlstr) sqlstr = 'CREATE TABLE IF NOT EXISTS %s.pinyin ( plen INTEGER, ' % database sqlstr += ''.join(map((lambda x: 'p%d INTEGER, ' % x), range(7))) sqlstr += 'zi TEXT, freq INTEGER);' self.db.execute(sqlstr) sqlstr = 'CREATE TABLE IF NOT EXISTS %s.phrases (id INTEGER PRIMARY KEY AUTOINCREMENT, mlen INTEGER, clen INTEGER, ' % database sqlstr += ''.join(map((lambda x: 'm%d INTEGER, ' % x), range(self._mlen))) if self._is_chinese: sqlstr += 'category INTEGER, ' sqlstr += 'phrase TEXT, freq INTEGER, user_freq INTEGER);' self.db.execute(sqlstr) self.db.commit() def update_ime(self, attrs): '''Update attributes in ime table, attrs is a iterable object Like [(attr,val), (attr,val), ...] ''' sqlstr = 'UPDATE main.ime SET val = ? WHERE attr = ?;' for attr, val in attrs: _sqlstr = 'SELECT * from main.ime WHERE attr = ?' res = self.db.execute(_sqlstr, (attr,)).fetchall() if res: self.db.execute(sqlstr, (val, attr)) continue self.ime_property_cache = { } self._mlen = int(self.get_ime_property('max_key_length')) self._is_chinese = self.is_chinese() self._set_add_phrase_sqlstr() self._pt_index = [ 'id', 'mlen', 'clen'] for i in range(self._mlen): self._pt_index.append('m%d' % i) if self._is_chinese: self._pt_index += [ 'category'] self._pt_index += [ 'phrase', 'freq', 'user_freq'] self.user_can_define_phrase = self.get_ime_property('user_can_define_phrase') if self.user_can_define_phrase: if self.user_can_define_phrase.lower() == u'true': self.user_can_define_phrase = True else: self.user_can_define_phrase = False else: print 'Could not find "user_can_define_phrase" entry from database, is it a outdated database?' self.user_can_define_phrase = False self.rules = self.get_rules() self.db.commit() def get_rules(self): '''Get phrase construct rules''' rules = { } if self.user_can_define_phrase: try: _rules = self.get_ime_property('rules') if _rules: _rules = _rules.strip().split(';') for rule in _rules: res = patt_r.match(rule) if res: cms = [] if res.group(1) == 'a': rules['above'] = int(res.group(2)) _cms = res.group(3).split('+') if len(_cms) > int(self.get_ime_property('max_key_length')): print 'rule: "%s" over max key length' % rule break for _cm in _cms: cm_res = patt_p.match(_cm) cms.append((int(cm_res.group(1)), int(cm_res.group(2)))) rules[int(res.group(2))] = cms continue print 'not a legal rule: "%s"' % rule except Exception: import traceback traceback.print_exc() return rules return '' def phrase_keys_len(self): '''Return the phrase possible key length''' max_len = self.rules['above'] try: return (map,)((lambda x: len(self.rules[x])), range(2, max_len + 1))[:] except: return None def get_no_check_chars(self): '''Get the characters which engine should not change freq''' _chars = self.get_ime_property('no_check_chars') try: _chars = _chars.decode('utf-8') except: pass return _chars def add_phrases(self, phrases, database = 'main'): '''Add phrases to database, phrases is a iterable object Like: [(tabkeys, phrase, freq ,user_freq), (tabkeys, phrase, freq, user_freq), ...] ''' map(self.add_phrase, phrases, [ database] * len(phrases), [ False] * len(phrases)) self.db.commit() def add_new_phrases(self, nphrases, database = 'main'): '''Add new phrases into db, new phrases is a object of [(phrase,freq), (phrase,freq),...]''' n_phrases = [] for _ph, _freq in nphrases: try: _tabkey = self.parse_phrase_to_tabkeys(_ph) if not self.check_phrase_internal(_ph, _tabkey, database): n_phrases.append((_tabkey, _ph, _freq, 0)) continue print '"%s" would not been added' % _ph continue if n_phrases: self.add_phrases(n_phrases, database) def u_add_phrase(self, nphrase): '''Add a phrase to userdb''' self.add_phrase(nphrase, database = 'user_db', commit = False) def _set_add_phrase_sqlstr(self): '''Create the sqlstr for add phrase according to self._mlen.''' sqlstr = 'INSERT INTO %s.phrases ( mlen, clen, ' sql_suffix = 'VALUES ( ?, ?, ' mmlen = range(self._mlen) sqlstr += ''.join(map((lambda x: 'm%d, ' % x), mmlen)) sql_suffix += ''.join(map((lambda x: '?, '), mmlen)) if self._is_chinese: sqlstr += 'category, ' sql_suffix += '?, ' sqlstr += 'phrase, freq, user_freq) ' sql_suffix += '?, ?, ? );' sqlstr += sql_suffix self._add_phrase_sqlstr = sqlstr def add_phrase(self, aphrase, database = 'main', commit = True): '''Add phrase to database, phrase is a object of (tabkeys, phrase, freq ,user_freq) ''' sqlstr = self._add_phrase_sqlstr try: (tabkeys, phrase, freq, user_freq) = aphrase except: (tabkeys, phrase, freq) = aphrase user_freq = 0 if self._is_chinese: category = 0 if type(phrase) != type(u''): phrase = phrase.decode('utf8') try: phrase.encode('gb2312') category |= 1 except: if '\xe3\x80\x87'.decode('utf8') in phrase: category |= 1 try: phrase.encode('big5hkscs') category |= 2 except: if category & 1: pass else: try: phrase.encode('gbk') category |= 1 if not category & (1 | 2): category |= 4 try: tbks = self.parse(tabkeys) if len(tbks) != len(tabkeys): print 'In %s %s: we parse tabkeys fail' % (phrase, tabkeys) return None record = [ None] * (5 + self._mlen) record[0] = len(tabkeys) record[1] = len(phrase) record[2:2 + len(tabkeys)] = (map,)((lambda x: tbks[x].get_key_id()), range(0, len(tabkeys))) if self._is_chinese: record += [ None] record[-4] = category record[-3:] = (phrase, freq, user_freq) self.db.execute(sqlstr % database, record) if commit: self.db.commit() except Exception: import traceback traceback.print_exc() def add_goucima(self, gcms): '''Add goucima into database, gcms is iterable object Like gcms = [(zi,goucima),(zi,goucima), ...] ''' count = 1 for zi, gcm in gcms: _con = '' _val = '' _len = min(len(gcm), self._mlen) for i in range(_len): _con += ', g%d' % i _val += ', ?' sqlstr = 'INSERT INTO main.goucima ( zi %s )\n VALUES ( ? %s );' % (_con, _val) try: gc = self.parse(gcm) if len(gc) != len(gcm): error_m = u'%s %s: Can not parse goucima' % (zi, gcm) raise Exception(error_m.encode('utf8')) len(gc) != len(gcm) record = [ zi] for i in range(_len): record.append(gc[i].get_key_id()) self.db.execute(sqlstr, record) except Exception: import traceback traceback.print_exc() count += 1 self.db.commit() def add_pinyin(self, pinyins, database = 'main'): '''Add pinyin to database, pinyins is a iterable object Like: [(zi,pinyin, freq), (zi, pinyin, freq), ...] ''' sqlstr = 'INSERT INTO %s.pinyin ( plen, ' sql_suffix = 'VALUES ( ?, ' for i in range(7): sqlstr += 'p%d, ' % i sql_suffix += '?, ' sqlstr += 'zi, freq ) ' sql_suffix += '?, ? );' sqlstr += sql_suffix count = 1 for pinyin, zi, freq in pinyins: try: pinyin_n = pinyin.replace('1', '!').replace('2', '@').replace('3', '#').replace('4', '$').replace('5', '%') py = self.parse(pinyin_n) if len(py) != len(pinyin_n): error_m = u'%s %s: Can not parse pinyin' % (zi, pinyin) raise Exception(error_m.encode('utf8')) len(py) != len(pinyin_n) record = [ None] * 10 record[0] = len(pinyin_n) for i in range(0, len(pinyin_n)): record[1 + i] = py[i].get_key_id() record[-2] = zi record[-1] = freq self.db.execute(sqlstr % database, record) except Exception: print count, ': ', zi.encode('utf8'), ' ', pinyin import traceback traceback.print_exc() count += 1 self.db.commit() def optimize_database(self, database = 'main'): sqlstr = '\n CREATE TABLE tmp AS SELECT * FROM %(database)s.phrases;\n DELETE FROM %(database)s.phrases;\n INSERT INTO %(database)s.phrases SELECT * FROM tmp ORDER BY\n %(tabkeystr)s mlen ASC, user_freq DESC, freq DESC, id ASC;\n DROP TABLE tmp;\n CREATE TABLE tmp AS SELECT * FROM %(database)s.goucima;\n DELETE FROM %(database)s.goucima;\n INSERT INTO %(database)s.goucima SELECT * FROM tmp ORDER BY zi,g0,g1;\n DROP TABLE tmp;\n CREATE TABLE tmp AS SELECT * FROM %(database)s.pinyin;\n DELETE FROM %(database)s.pinyin;\n INSERT INTO %(database)s.pinyin SELECT * FROM tmp ORDER BY p0,p1,p2,p3,p4,p5,plen ASC;\n DROP TABLE tmp;\n ' tabkeystr = '' for i in range(self._mlen): tabkeystr += 'm%d, ' % i self.db.executescript(sqlstr % { 'database': database, 'tabkeystr': tabkeystr }) self.db.executescript('VACUUM;') self.db.commit() def drop_indexes(self, database): """Drop the index in database to reduce it's size""" sqlstr = '\n DROP INDEX IF EXISTS %(database)s.goucima_index_z;\n DROP INDEX IF EXISTS %(database)s.pinyin_index_i;\n DROP INDEX IF EXISTS %(database)s.phrases_index_p;\n DROP INDEX IF EXISTS %(database)s.phrases_index_i;\n VACUUM; \n ' % { 'database': database } self.db.executescript(sqlstr) self.db.commit() def create_indexes(self, database, commit = True): sqlstr = '\n DROP INDEX IF EXISTS %(database)s.goucima_index_z;\n CREATE INDEX IF NOT EXISTS %(database)s.goucima_index_z ON goucima (zi);\n DROP INDEX IF EXISTS %(database)s.pinyin_index_i;\n CREATE INDEX IF NOT EXISTS %(database)s.pinyin_index_i ON pinyin (p0,p1,p2,p3,p4,p5,plen ASC, freq DESC);\n VACUUM; \n ' % { 'database': database } sqlstr_t = '\n DROP INDEX IF EXISTS %(database)s.phrases_index_p;\n CREATE INDEX IF NOT EXISTS %(database)s.phrases_index_p ON phrases\n (%(tabkeystr)s mlen ASC, freq DESC, id ASC);\n DROP INDEX IF EXISTS %(database)s.phrases_index_i;\n CREATE INDEX IF NOT EXISTS %(database)s.phrases_index_i ON phrases (phrase, mlen ASC);\n ' tabkeystr = '' for i in range(self._mlen): tabkeystr += 'm%d,' % i if database == 'main': sqlstr = sqlstr_t % { 'database': database, 'tabkeystr': tabkeystr } + sqlstr else: sqlstr = sqlstr_t % { 'database': database, 'tabkeystr': tabkeystr } self.db.executescript(sqlstr) if commit: self.db.commit() def compare(self, x, y): if not cmp(x[1], y[1]) and -cmp(x[-1], y[-1]) and -cmp(x[-2], y[-2]): pass return cmp(x[0], y[0]) def select_words(self, tabkeys, onechar = False, bitmask = 0): ''' Get phrases from database by tab_key objects ( which should be equal or less than the max key length) This method is called in table.py by passing UserInput held data Return result[:] ''' _len = min(len(tabkeys), self._mlen) _condition = '' _condition += ''.join(map((lambda x: 'AND m%d = ? ' % x), range(_len))) if onechar: _condition += 'AND clen=1 ' if bitmask: all_ints = xrange(1, 5) need_ints = (filter,)((lambda x: x & bitmask), all_ints) bit_condition = 'OR'.join(map((lambda x: ' category = %d ' % x), need_ints)) _condition += 'AND (%s) ' % bit_condition w_len = (self._mlen - _len) + 1 x_len = 2 while x_len <= w_len + 1: sqlstr = 'SELECT * FROM (SELECT * FROM main.phrases WHERE mlen < %(mk)d %(condition)s \n UNION ALL\n SELECT * FROM user_db.phrases WHERE mlen < %(mk)d %(condition)s \n UNION ALL\n SELECT * FROM mudb.phrases WHERE mlen < %(mk)d %(condition)s )\n ORDER BY mlen ASC, user_freq DESC, freq DESC, id ASC;' % { 'mk': _len + x_len, 'condition': _condition } _tabkeys = map(int, tabkeys[:_len]) _tabkeys += _tabkeys + _tabkeys result = self.db.execute(sqlstr, _tabkeys).fetchall() if len(result) > 0: break x_len += 1 sysdb = { } usrdb = { } mudb = { } _cand = [] searchres = map((lambda res: [ int(res[-2]), int(res[-1]), [ (res[1:-2], [ res[:-1], res[-1:]])]]), result) reslist = filter((lambda x: not x[1]), searchres) (map,)((lambda x: sysdb.update(x[2])), reslist) reslist = filter((lambda x: if x[0] in (0, -1): passx[1]), searchres) (map,)((lambda x: usrdb.update(x[2])), reslist) reslist = filter((lambda x: if x[0] not in (0, -1): passx[1]), searchres) (map,)((lambda x: mudb.update(x[2])), reslist) searchres = (map,)((lambda key: mudb[key][0] + mudb[key][1]), mudb) map(_cand.append, searchres) searchres = (None, map)((lambda key: if not not mudb.has_key(key) or usrdb[key][0] + usrdb[key][1]: pass), usrdb) searchres = filter((lambda x: bool(x)), searchres) map(_cand.append, searchres) searchres = (None, None, map)((lambda key: if not not mudb.has_key(key) or not usrdb.has_key(key) or sysdb[key][0] + sysdb[key][1]: pass), sysdb) searchres = filter((lambda x: bool(x)), searchres) map(_cand.append, searchres) _cand.sort(cmp = self.compare) return _cand[:] def select_zi(self, tabkeys): ''' Get zi from database by tab_key objects ( which should be equal or less than 6) This method is called in table.py by passing UserInput held data Return result[:] ''' _len = min(len(tabkeys), 7) _condition = '' _condition += ''.join(map((lambda x: 'AND p%d = ? ' % x), range(_len))) if _len < 7: if _len < 3: x_len = 3 else: x_len = _len + 1 else: x_len = _len while x_len < 8: sqlstr = 'SELECT * FROM main.pinyin WHERE plen < %(mk)d %(condition)s \n ORDER BY plen ASC, freq DESC;' % { 'mk': x_len, 'condition': _condition } _tabkeys = map(int, tabkeys[:_len]) result = self.db.execute(sqlstr, _tabkeys).fetchall() if len(result) > 0: break x_len += 1 return result[:] def get_ime_property(self, attr): '''get IME property from database, attr is the string of property, which should be str.lower() :) ''' if attr not in self.ime_property_cache: sqlstr = 'SELECT val FROM main.ime WHERE attr = ?' _result = self.db.execute(sqlstr, (attr,)).fetchall() if _result: self.ime_property_cache[attr] = _result[0][0] else: self.ime_property_cache[attr] = None return self.ime_property_cache[attr] def get_phrase_table_index(self): '''get a list of phrase table columns name''' return self._pt_index[:] def generate_userdb_desc(self): try: sqlstring = 'CREATE TABLE IF NOT EXISTS user_db.desc (name PRIMARY KEY, value);' self.db.executescript(sqlstring) sqlstring = 'INSERT OR IGNORE INTO user_db.desc VALUES (?, ?);' self.db.execute(sqlstring, ('version', '0.4')) sqlstring = 'INSERT OR IGNORE INTO user_db.desc VALUES (?, DATETIME("now", "localtime"));' self.db.execute(sqlstring, ('create-time',)) self.db.commit() except: import traceback traceback.print_exc() def init_user_db(self, db_file): if not path.exists(db_file): db = sqlite3.connect(db_file) db.execute('PRAGMA page_size = 4096;') db.execute('PRAGMA cache_size = 20000;') db.execute('PRAGMA temp_store = MEMORY; ') db.execute('PRAGMA synchronous = OFF; ') db.commit() def get_database_desc(self, db_file): if not path.exists(db_file): return None try: db = sqlite3.connect(db_file) db.execute('PRAGMA page_size = 4096;') db.execute('PRAGMA cache_size = 20000;') db.execute('PRAGMA temp_store = MEMORY; ') db.execute('PRAGMA synchronous = OFF; ') desc = { } for row in db.execute('SELECT * FROM desc;').fetchall(): desc[row[0]] = row[1] self.db.commit() return desc except: path.exists(db_file) return None def get_table_phrase_len(self, db_file): table_patt = re.compile('.*\\((.*)\\)') if not path.exists(db_file): return 0 try: db = sqlite3.connect(db_file) tp_res = db.execute("select sql from sqlite_master where name='phrases';").fetchall() self.db.commit() res = table_patt.match(tp_res[0][0]) if res: tp = res.group(1).split(',') return len(tp) return 0 except: path.exists(db_file) return 0 def cache_goucima(self): self._goucima = { } goucima = self.db.execute('SELECT * FROM main.goucima;').fetchall() (map,)((lambda x: self._goucima.update({ x[0]: x[1:] })), goucima) def get_gcm_id(self, zi): '''Get goucima of given character''' if self._goucima: if not isinstance(zi, unicode): zi = zi.decode('utf-8') try: gcds = self._goucima[zi] return gcds sqlstr = 'SELECT %s FROM main.goucima WHERE zi =?;' % ','.join(map((lambda x: 'g%d' % x), range(self._mlen))) return self.db.execute(sqlstr, (zi,)).fetchall()[0] def parse_phrase(self, phrase): '''Parse phrase to get its Table code''' try: phrase = unicode(phrase) except: phrase = phrase.decode('utf8') p_len = len(phrase) tabkeylist = [] if p_len < 2: return [] try: if p_len >= self.rules['above']: rule = self.rules[self.rules['above']] elif p_len in self.rules: rule = self.rules[p_len] else: raise Exception('unsupport len of phrase') if p_len < 2(rule) > self._mlen: raise Exception('fault rule: %s' % rule) p_len < 2(rule) > self._mlen tabkeylist = (None, map)((lambda x: self.get_gcm_id(phrase[x[0] - 1])[x[1] - 1]), rule) return [ len(tabkeylist)] + [ p_len] + tabkeylist[:] + [ phrase] except: p_len < 2 print 'pharse pharse "%s" fail.' % phrase.encode('utf-8') def parse_phrase_to_tabkeys(self, phrase): '''Get the Table encoding of the phrase in string form''' try: tabres = self.parse_phrase(phrase)[2:-1] except: tabres = None if tabres: tabkeys = u''.join(map(self.deparse, tabres)) else: tabkeys = u'' return tabkeys def check_phrase(self, phrase, tabkey = None, database = 'main'): if type(phrase) != type(u''): phrase = phrase.decode('utf8') if self.user_can_define_phrase: self.check_phrase_internal(phrase, tabkey, database) else: map(self.check_phrase_internal, phrase) def check_phrase_internal(self, phrase, tabkey = None, database = 'main'): '''Check word freq and user_freq ''' if type(phrase) != type(u''): phrase = phrase.decode('utf8') if self._is_chinese: if phrase in tabdict.chinese_nocheck_chars: return None if len(phrase) >= 2: try: wordattr = self.parse_phrase(phrase) _len = len(wordattr) - 3 return None if not tabkey or len(tabkey) > self._mlen: sqlstr = 'SELECT * FROM (SELECT * FROM main.phrases WHERE phrase = ?\n UNION ALL SELECT * FROM user_db.phrases WHERE phrase = ?\n UNION ALL SELECT * FROM mudb.phrases WHERE phrase = ?)\n ORDER BY user_freq DESC, freq DESC, id ASC;\n ' result = self.db.execute(sqlstr, (phrase, phrase, phrase)).fetchall() else: tabks = self.parse(tabkey) tabkids = tuple(map(int, tabks)) condition = ' and '.join(map((lambda x: 'm%d = ?' % x), range(len(tabks)))) sqlstr = 'SELECT * FROM \n (\n SELECT * FROM main.phrases WHERE phrase = ? and %(cond)s\n UNION ALL SELECT * FROM user_db.phrases WHERE phrase = ? and %(cond)s\n UNION ALL SELECT * FROM mudb.phrases WHERE phrase = ? and %(cond)s\n )\n ORDER BY user_freq DESC, freq DESC, id ASC;\n ' % { 'cond': condition } result = self.db.execute(sqlstr, ((phrase,) + tabkids) * 3).fetchall() if not bool(result): sqlstr = 'SELECT * FROM (SELECT * FROM main.phrases WHERE phrase = ?\n UNION ALL SELECT * FROM user_db.phrases WHERE phrase = ?\n UNION ALL SELECT * FROM mudb.phrases WHERE phrase = ?)\n ORDER BY user_freq DESC, freq DESC, id ASC;\n ' result = self.db.execute(sqlstr, (phrase, phrase, phrase)).fetchall() sysdb = { } usrdb = { } mudb = { } searchres = map((lambda res: [ int(res[-2]), int(res[-1]), [ (res[1:-2], [ res[:-1], res[-1]])]]), result) reslist = filter((lambda x: not x[1]), searchres) (map,)((lambda x: sysdb.update(x[2])), reslist) reslist = filter((lambda x: if x[0] in (0, -1): passx[1]), searchres) (map,)((lambda x: usrdb.update(x[2])), reslist) reslist = filter((lambda x: if x[0] not in (0, -1): passx[1]), searchres) (map,)((lambda x: mudb.update(x[2])), reslist) tabkey = '' if len(phrase) >= 2: tabkey = u''.join(map(self.deparse, wordattr[2:2 + _len])) if self._is_chinese: sqlstr = 'UPDATE mudb.phrases SET user_freq = ? WHERE mlen = ? AND clen = ? %s AND category = ? AND phrase = ?;' else: sqlstr = 'UPDATE mudb.phrases SET user_freq = ? WHERE mlen = ? AND clen = ? %s AND phrase = ?;' try: if len(phrase) == 1: if not self.dynamic_adjust: return None keyout = (filter,)((lambda k: mudb.has_key(k)), usrdb.keys()) map(usrdb.pop, keyout) keyout = (self.dynamic_adjust, filter)((lambda k: if not mudb.has_key(k): passusrdb.has_key(k)), sysdb.keys()) map(sysdb.pop, keyout) (None, None, map)((lambda res: self.db.execute(sqlstr % ''.join(map((lambda x: 'AND m%d = ? ' % x), range(res[0]))), [ mudb[res][1] + 1] + list(res[:2 + res[0]]) + list(res[2 + self._mlen:])) ), mudb.keys()) self.db.commit() (None, None, map)((lambda res: self.add_phrase((''.join(map(self.deparse, res[2:2 + int(res[0])])), phrase, 1, usrdb[res][1] + 1), database = 'mudb')), usrdb.keys()) (None, map)((lambda res: self.add_phrase((''.join(map(self.deparse, res[2:2 + int(res[0])])), phrase, 2, 1), database = 'mudb')), sysdb.keys()) elif len(result) == 0 and self.user_can_define_phrase: self.add_phrase((tabkey, phrase, -2, 1), database = 'mudb') elif len(result) > 0: if not self.dynamic_adjust: return None keyout = (filter,)((lambda k: mudb.has_key(k)), usrdb.keys()) map(usrdb.pop, keyout) keyout = (self.dynamic_adjust, filter)((lambda k: if not mudb.has_key(k): passusrdb.has_key(k)), sysdb.keys()) map(sysdb.pop, keyout) (None, None, map)((lambda res: self.db.execute(sqlstr % ''.join(map((lambda x: 'AND m%d = ? ' % x), range(res[0]))), [ mudb[res][1] + 1] + list(res[:2 + res[0]]) + list(res[2 + self._mlen:])) ), mudb.keys()) self.db.commit() (None, None, map)((lambda res: None((self.add_phrase, ''.join(map(self.deparse, res[2:2 + int(res[0])])), phrase if usrdb[res][0][-1] == -1 else 1, usrdb[res][1] + 1), database = 'mudb')), usrdb.keys()) (None, map)((lambda res: self.add_phrase((''.join(map(self.deparse, res[2:2 + int(res[0])])), phrase, 2, 1), database = 'mudb')), sysdb.keys()) except: import traceback traceback.print_exc() def find_zi_code(self, zi): '''Check word freq and user_freq ''' zi = zi.decode('utf8') sqlstr = 'SELECT * FROM main.phrases WHERE phrase = ?\n ORDER BY mlen ASC;\n' result = self.db.execute(sqlstr, (zi,)).fetchall() codes = [] try: if result: for _res in result: tabkey = u'' for i in range(int(_res[1])): tabkey += self.deparse(_res[3 + i]) codes.append(tabkey) except: import traceback traceback.print_exc() return codes[:] def remove_phrase(self, phrase, database = 'user_db'): '''Remove phrase from database, default is from user_db phrase should be the a row of select * result from database Like (id, mlen,clen,m0,m1,m2,m3,phrase,freq,user_freq) ''' _ph = list(phrase[:-2]) _condition = '' for i in range(_ph[1]): _condition += 'AND m%d = ? ' % i nn = _ph.count(None) if nn: for i in range(nn): _ph.remove(None) if self._is_chinese: msqlstr = 'SELECT * FROM %(database)s.phrases WHERE mlen = ? and clen = ? %(condition)s AND category = ? AND phrase = ? ;' % { 'database': database, 'condition': _condition } else: msqlstr = 'SELECT * FROM %(database)s.phrases WHERE mlen = ? and clen = ? %(condition)s AND phrase = ? ;' % { 'database': database, 'condition': _condition } if self.db.execute(msqlstr, _ph[1:]).fetchall(): if self._is_chinese: sqlstr = 'DELETE FROM %(database)s.phrases WHERE mlen = ? AND clen =? %(condition)s AND category = ? AND phrase = ? ;' % { 'database': database, 'condition': _condition } else: sqlstr = 'DELETE FROM %(database)s.phrases WHERE mlen = ? AND clen =? %(condition)s AND phrase = ? ;' % { 'database': database, 'condition': _condition } self.db.execute(sqlstr, _ph[1:]) self.db.commit() if self._is_chinese: msqlstr = 'SELECT * FROM mudb.phrases WHERE mlen = ? and clen = ? %(condition)s AND category = ? AND phrase = ? ;' % { 'condition': _condition } else: msqlstr = 'SELECT * FROM mudb.phrases WHERE mlen = ? and clen = ? %(condition)s AND phrase = ? ;' % { 'condition': _condition } if self.db.execute(msqlstr, _ph[1:]).fetchall(): if self._is_chinese: sqlstr = 'DELETE FROM mudb.phrases WHERE mlen = ? AND clen =? %(condition)s AND category = ? AND phrase = ? ;' % { 'condition': _condition } else: sqlstr = 'DELETE FROM mudb.phrases WHERE mlen = ? AND clen =? %(condition)s AND phrase = ? ;' % { 'condition': _condition } self.db.execute(sqlstr, _ph[1:]) self.db.commit() def extra_user_phrases(self, udb, only_defined = False): '''extract user phrases from database''' try: db = sqlite3.connect(udb) except: return None if only_defined: _phrases = db.execute('SELECT clen, phrase, freq, sum(user_freq) FROM phrases WHERE freq=-1 AND mlen != 0 GROUP BY clen,phrase;').fetchall() else: _phrases = db.execute('SELECT clen, phrase, freq, sum(user_freq) FROM phrases WHERE mlen !=0 GROUP BY clen,phrase;').fetchall() db.commit() return _phrases[:]